library(tidyverse)
library(plotly)
load("wireless.rda")
plot(x=wireless$x, y=wireless$y)
points(x=AP$x,y=AP$y, col="red", cex=1)
# exploring relationship between distance and signal strength
# without the loss of generality, use X
ap1 = as.numeric(AP[1,])
distances = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - ap1)^2))})
par(mfrow=c(2,2))
plot(y=distances[distances > cutoff]^2, x=-wireless$S1[distances > cutoff])
plot(x=log(-wireless$S1[distances > cutoff]), y=2*log(distances[distances > cutoff]))
plot(y=distances, x=-wireless$S1)
plot(x=log(-wireless$S1[distances > cutoff]), y=log(distances[distances > cutoff]))
mod = lm(wireless$S2~log(distances2))
summary(mod)
Call:
lm(formula = wireless$S2 ~ log(distances2))
Residuals:
Min 1Q Median 3Q Max
-17.6911 -3.9331 -0.2897 3.8617 17.3204
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.2900 2.5687 1.67 0.0961 .
log(distances2) -18.1938 0.5488 -33.15 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.86 on 252 degrees of freedom
Multiple R-squared: 0.8135, Adjusted R-squared: 0.8127
F-statistic: 1099 on 1 and 252 DF, p-value: < 2.2e-16
TODO: try to do that for S2, S3… as well
# exploring relationship between distance and signal strength
# without the loss of generality, use X
ap2 = as.numeric(AP[2,])
distances2 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - ap2)^2))})
par(mfrow=c(2,2))
plot(y=(distances2[distances2 < cutoff])^2, x=-wireless$S2[distances2 < cutoff])
plot(x=log(-wireless$S2[distances2 < cutoff]), y=2*log(distances2[distances2 < cutoff]))
plot(y=distances2[distances2 < cutoff], x=-wireless$S2[distances2 < cutoff])
plot(x=log(-wireless$S2[distances2 < cutoff]), y=log(distances2[distances2 < cutoff]))
plot(x=(distances2)[distances2 < cutoff], y=-wireless$S2[distances2 < cutoff])
#for distance above 100, the linear relationship between signal and distance breaks
#lots of points have -92 the worse signal ever
mod2 = lm(wireless$S2[distances2 < cutoff]~distances2[distances2 < cutoff])
mod1 = lm(wireless$S1[distances < cutoff]~distances[distances < cutoff])
dx = (wireless[knn_predictions[223],4] -wireless[223,4])/mod$coefficients[2] *
(wireless[223,4]-mod$coefficients[1])/mod$coefficients[2] /
(wireless[knn_predictions[223],1]-AP$x[2])
dy = (wireless[knn_predictions[223],4] -wireless[223,4])/mod$coefficients[2] *
(wireless[223,4]-mod$coefficients[1])/mod$coefficients[2] /
(wireless[knn_predictions[223],2]-AP$y[2])
1/test2
x y
[1,] -1.266179 -26.674175
[2,] 3.147891 -5.036626
[3,] 4.904649 2.239919
[4,] -1.533693 4.114786
[5,] -1.273203 8.885615
k = mod2$coefficients[2]
b = mod2$coefficients[1]
signals = wireless_feature[224,]
test = t(apply(AP, 1, function(x) {as.numeric(wireless[224,1:2]) - x}))
test2 = apply(test, 2, function(x) as.numeric(k^2/(signals - b)) * x)
df_mod = lm(as.numeric(wireless_feature[223,] - wireless_feature[224,])~0 +
test2[,1] + test2[,2])
summary(df_mod)
Call:
lm(formula = as.numeric(wireless_feature[223, ] - wireless_feature[224,
]) ~ 0 + test2[, 1] + test2[, 2])
Residuals:
1 2 3 4 5
0.2427 5.0153 1.8677 -0.5638 2.7374
Coefficients:
Estimate Std. Error t value Pr(>|t|)
test2[, 1] -2.071 2.681 -0.773 0.496
test2[, 2] -3.237 6.455 -0.502 0.651
Residual standard error: 3.489 on 3 degrees of freedom
Multiple R-squared: 0.1886, Adjusted R-squared: -0.3523
F-statistic: 0.3487 on 2 and 3 DF, p-value: 0.7308
##exploring using differentials
##
sample_index = sample(1:nrow(wireless), 1)
sample_point = wireless[sample_index,]
sample_diff = data.frame(t(apply(wireless[-sample_index,], 1, function(x) x - as.numeric(wireless[sample_index,]))))
sample_diff_y = sample_diff[sample_diff$y == 0,]
mod = lm(x~.-y, data=sample_diff_y)
summary(mod)
Call:
lm(formula = x ~ . - y, data = sample_diff_y)
Residuals:
Min 1Q Median 3Q Max
-25.7877 -7.6092 -0.1254 9.2944 19.8359
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.6899 8.1992 0.084 0.9336
S1 -1.3331 0.6897 -1.933 0.0638 .
S2 3.7611 0.4953 7.593 3.62e-08 ***
S3 -1.5752 1.0351 -1.522 0.1397
S4 -0.7039 0.9135 -0.771 0.4477
S5 0.7194 0.4555 1.579 0.1259
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 11.43 on 27 degrees of freedom
Multiple R-squared: 0.9664, Adjusted R-squared: 0.9602
F-statistic: 155.3 on 5 and 27 DF, p-value: < 2.2e-16
avg_error
[1] 17.20438
plot(wireless$x, wireless$y)
points(basic_x$fitted.values, basic_y$fitted.values, col="red")
segments(wireless$x, wireless$y, basic_x$fitted.values, basic_y$fitted.values, col="blue")
n = nrow(wireless)
#train_percent = 0.6
#sample_indices = sample(1:nrow(wireless), train_percent*n)
knn_predictions = numeric(n)
wireless_feature = wireless[,3:7]
pwdistances = as.matrix(dist(wireless_feature))
for (i in 1:n) {
knn_predictions[i] = (1:n)[-i][which.min(as.matrix(pdist::pdist(wireless_feature[i,], wireless_feature[-i,])))]
}
knn_predictions
[1] 24 23 6 156 6 181 101 37 36 11 10 10 14 182 14 86 86 16 18 24 19 102 2 25 24 27 26 30 31 28
[31] 32 31 31 33 186 37 36 49 43 253 49 46 39 253 49 42 79 83 41 51 50 80 65 57 56 54 54 59 60 59
[61] 62 61 64 63 53 93 66 63 46 71 70 83 74 73 74 77 81 77 76 52 77 204 72 95 106 107 147 108 119 100
[91] 74 65 66 103 84 106 107 108 89 90 239 22 94 240 95 85 86 98 119 120 127 144 33 29 161 135 97 98 89 110
[121] 175 127 144 149 153 133 166 130 160 128 154 133 126 135 134 135 138 137 138 165 165 131 123 123 147 147 146 150 124 148
[151] 152 151 125 131 153 157 156 159 160 129 129 163 162 135 140 127 146 177 173 166 176 168 169 191 179 191 168 188 175 191
[181] 6 10 86 27 99 35 195 178 234 179 174 178 198 221 187 212 249 241 219 204 238 233 233 236 222 207 206 206 211 211
[211] 209 213 212 196 217 222 215 217 217 247 194 216 224 223 226 225 229 241 227 229 244 244 203 235 234 204 213 201 101 104
[241] 228 247 44 232 249 250 242 252 197 246 254 248 40 251
knn_x = wireless$x[knn_predictions]
knn_y = wireless$y[knn_predictions]
knn_avg_error = mean(sqrt((wireless$x - knn_x)^2 + (wireless$y - knn_y)^2))
knn_avg_error
[1] 10.97828
par(mfrow=c(1,2))
plot(density(sqrt((wireless$x - knn_x)^2 + (wireless$y - knn_y)^2)),
main = "knn performance")
plot(density(sqrt((wireless$x - basic_x$fitted.values)^2 + (wireless$y - basic_y$fitted.values)^2)),
main = "regression performance")
knn_errors = sqrt((wireless$x - knn_x)^2 + (wireless$y - knn_y)^2)
plot(wireless$x, wireless$y)
points(knn_x, knn_y, col="red")
segments(wireless$x, wireless$y, knn_x, knn_y, col="blue")
num_iter = 1000
coef_x = matrix(-1, nrow=num_iter, ncol=5)
coef_y = matrix(-1, nrow=num_iter, ncol=5)
singularities = 0
for (i in 1:num_iter) {
Error = TRUE
while (Error) {
tryCatch(
{
sample_indices = sample(1:n,5)
temp_A = wireless_feature[sample_indices,]
temp_bx = wireless$x[sample_indices]
temp_by = wireless$y[sample_indices]
temp_x = solve(temp_A, temp_bx)
temp_y = solve(temp_A, temp_by)
Error = FALSE
},
error=function(cond) {
}
)}
coef_x[i,] = temp_x
coef_y[i,] = temp_y
}
dx = c(
mean(coef_x[,1][abs(coef_x[,1]) < 20]),
mean(coef_x[,2][abs(coef_x[,2]) < 20]),
mean(coef_x[,2][abs(coef_x[,3]) < 20]),
mean(coef_x[,2][abs(coef_x[,4]) < 20]),
mean(coef_x[,2][abs(coef_x[,5]) < 20])
)
dy = c(
mean(coef_y[,1][abs(coef_y[,1]) < 20]),
mean(coef_y[,2][abs(coef_y[,2]) < 20]),
mean(coef_y[,2][abs(coef_y[,3]) < 20]),
mean(coef_y[,2][abs(coef_y[,4]) < 20]),
mean(coef_y[,2][abs(coef_y[,5]) < 20])
)
df_avg_error
[1] 13.66296
set.seed(12345)
sample_indices = sample(1:254, 50)
plot(wireless$x[sample_indices], wireless$y[sample_indices], ylim=c(0, 145), xlim=c(10, 235))
points((knn_x-df_x)[sample_indices], (knn_y-df_y)[sample_indices], col="red")
points(knn_x[sample_indices], knn_y[sample_indices], col="yellow")
segments(wireless$x[sample_indices], wireless$y[sample_indices], knn_x[sample_indices], knn_y[sample_indices], col="blue")
segments(knn_x[sample_indices], knn_y[sample_indices], (knn_x-df_x)[sample_indices], (knn_y-df_y)[sample_indices], col="green")
#segments(wireless$x, wireless$y, knn_x+df_x, knn_y+ df_y, col="green")
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter",
mode="markers", text=paste(1:254, "<br>", hover_text)) %>%
add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP))
p
kclusters = kmeans(wireless[,3:7], 5)
#kclusters$cluster
ggplot(data=wireless) +
geom_point(aes(x=x,y=y), colour=kclusters$cluster)
ggplot(data=wireless) +
geom_point(aes(x=x,y=y)) +
scale_fill_manual(kclusters$cluster)
cutoff = 68
# seems like 70 is a good cut off lets check how many points have more than 70
wireless_strong = wireless %>%
mutate(S1 = S1 > -cutoff) %>%
mutate(S2 = S2 > -cutoff) %>%
mutate(S3 = S3 > -cutoff) %>%
mutate(S4 = S4 > -cutoff) %>%
mutate(S5 = S5 > -cutoff)
# seems like 70 is not a good cutoff as we think
table(apply(wireless_strong[,3:7], 1, sum))
bad_locations = wireless_strong[as.numeric(apply(wireless_strong[,3:7], 1, sum)) < 2,]
plot(x=bad_locations$x, y=bad_locations$y, ylim=c(0,150), xlim=c(0,230))
points(x=AP$x,y=AP$y, col="red", cex=5)
View(data.frame(table(wireless$y)))